require(mosaic)
n = 20
fake_data = data.frame(x = rnorm(n))
fake_data = fake_data %>%
  mutate(y = 10 + 3 * x + rnorm(n))
xyplot(y ~ x, data=fake_data, type=c("p", "r"))

mod_ex <- lm(y ~x, data=fake_data)
plot(mod_ex)

We’ve been talking a lot about the LINE conditions for regression, but we haven’t seen any particularly pathological examples. So, lets examine some.

Good Model

require(mosaic)
n = 10000
beta0 = 10
beta1 = 3
x = runif(n)
e = rnorm(n)
ds = data.frame(y = beta0 + beta1 * x + e)
xyplot(y ~ x, data=ds, type=c("p", "r"), lwd=5)

mod1 = lm(y ~ x, data=ds)
plot(mod1, which=1)

plot(mod1, which=2)

histogram(~residuals(mod1), fit="normal")

Linearity

ds = data.frame(y = beta0 + beta1 * x + 15*x^2 + e)
xyplot(y ~ x, data=ds, type=c("p", "r"), lwd=5)

mod2 = lm(y ~ x, data=ds)
plot(mod2, which=1)

plot(mod2, which=2)

histogram(~residuals(mod2), fit="normal")

Constant Variance

ds = data.frame(y = beta0 + beta1 * x + e*x)
xyplot(y ~ x, data=ds, type=c("p", "r"), lwd=5)

mod3 = lm(y ~ x, data=ds)
plot(mod3, which=1)

plot(mod3, which=2)

histogram(~residuals(mod3), fit="normal")

Normality

ds = data.frame(y = beta0 + beta1 * x + rbeta(n, shape1 = 2, shape2 = 5))
xyplot(y ~ x, data=ds, type=c("p", "r"), lwd=5)

mod4 = lm(y ~ x, data=ds)
plot(mod4, which=1)

plot(mod4, which=2)

histogram(~residuals(mod4), fit="normal")